Economics

Correlation Heatmap

First we have some data wrangle to do. Here, we choose a selection of 35 variables to work with. We also filter for the Connecticut governing regions rather than counties. Finally, we arrange the variables in sensible order so they appear in similar blocks on the correlation plot.

Code
pacman::p_load(
  dplyr,
  tidyr,
  tibble,
  stringr,
  purrr,
  tidyr
)

conflicted::conflicts_prefer(
  dplyr::filter(),
  dplyr::select(),
  .quiet = TRUE
)
source('dev/data_pipeline_functions.R')
source('dev/filter_fips.R')
metrics <- readRDS('data/sm_data.rds')[['metrics']]
metadata <- readRDS('data/sm_data.rds')[['metadata']]

# Use metadata to get help filter by dimension
econ_meta <- metadata %>% 
  filter(dimension == 'economics')

# Filter to economics dimension
econ_metrics <- metrics %>% 
  filter(variable_name %in% econ_meta$variable_name)

# Filter to latest year and new (post-2024) counties
# Also remove NAICS variables to leave us with an approachable number
# And pivot wider so it is easier to get correlations
econ_metrics_latest <- econ_metrics %>%
  filter_fips(scope = 'new') %>% 
  get_latest_year() %>% 
  filter(str_detect(variable_name, 'Naics|^lq|^avgEmpLvl', negate = TRUE))

# Pivot wider for easier correlations below
econ_metrics_latest <- econ_metrics_latest %>% 
  select(fips, variable_name, value) %>% 
  unique() %>% 
  pivot_wider(
    names_from = 'variable_name',
    values_from = 'value'
  ) %>% 
  unnest(!fips)

# Arrange in some reasonable order
econ_metrics_latest <- econ_metrics_latest %>% 
  select(
    matches('Code_|metro'),
    matches('employ|abor|Worker'), # employment
    matches('Sales'),
    matches('Earn|Income'),
    everything(),
    -fips,
    -matches('expHiredLaborPercOpExp')
  )

Now we can build an interactive correlation plot. We are calculating correlations based on complete pairwise observations to account for missing data and the changes to Census Bureau reporting for Connecticut.

This is a reminder to do a deeper dive on missing data once more of the data have been wrangled. There is a substantial amount given the varying scales at which data are collected, not to mention the issues at the county level with Connecticut.

Code
pacman::p_load(
  dplyr,
  ggplot2,
  plotly,
  reshape,
  Hmisc,
  viridisLite
)

# Make a correlation matrix using all the selected variables
cor <- econ_metrics_latest %>% 
  as.matrix() %>% 
  rcorr()

# Melt correlation values and rename columns
cor_r <- melt(cor$r) %>% 
  setNames(c('var_1', 'var_2', 'value'))

# Save p values
cor_p <- melt(cor$P)
p.value <- cor_p$value

# Make heatmap with custom text aesthetic for tooltip
plot <- cor_r %>% 
  ggplot(aes(var_1, var_2, fill = value, text = paste0(
  'Var 1: ', var_1, '\n',
  'Var 2: ', var_2, '\n',
  'Correlation: ', format(round(value, 3), nsmall = 3), '\n',
  'P-Value: ', format(round(p.value, 3), nsmall = 3)
))) + 
  geom_tile() + 
  scale_fill_viridis_c() + 
  theme(axis.text.x = element_text(hjust = 1, angle = 45)) +
  labs(
    x = NULL,
    y = NULL,
    fill = 'Correlation'
  )

# Convert to interactive plotly figure with text tooltip
ggplotly(
  plot, 
  tooltip = 'text',
  width = 1000,
  height = 800
)

Interactive Correlation Plot

Cladogram

Show Wiltshire framework - what have we covered, what have we added.

Code
pacman::p_load(
  ggtree,
  dplyr,
  ape,
  data.tree,
  viridisLite,
  stringr
)

## Load data and add an origin level
dat <- readRDS('data/tree_dat.rds') %>% 
  filter(Dimension == 'Economics') %>% 
  mutate(Framework = 'Sustainability') %>% 
  select(Framework, Dimension:Indicator) %>% 
  mutate(across(
    everything(), 
    ~ str_trim(str_replace_all(., ';|%|/|\\.|\"|,|\\(|\\)', '_'))
  ))

dat$pathString <- paste(
  dat$Framework,
  dat$Dimension,
  dat$Index,
  dat$Indicator,
  sep = '/'
)
tree <- as.Node(dat)

# Convert the data.tree structure to Newick format
tree_newick <- ToNewick(tree)

# Read the Newick tree into ape
phylo_tree <- read.tree(text = tree_newick)

# Make all edge lengths 1
phylo_tree$edge.length <- rep(1, length(phylo_tree$edge.length))

# Add a space to end of node labels so it isn't cut off
phylo_tree$node.label <- paste0(phylo_tree$node.label, ' ')

# Plot it
plot(
  phylo_tree, 
  type = 'c',
  cex = 0.75,
  edge.width = 2,
  show.tip.label = TRUE,
  label.offset = 0,
  no.margin = TRUE,
  tip.color = 'black',
  edge.color = viridis(181),
  x.lim = c(-0.1, 5)
)

nodelabels(
  phylo_tree$node.label,
  cex = 0.8,
  bg = 'white'
)

Cladogram of Sustainability Metrics framework

Metadata Table

Using the table:

  • Click column headers to sort
  • Global search at top right, column search in each header
  • Change page length and page through results at the bottom
  • Use the download button to download a .csv file of the filtered table
Code
pacman::p_load(
  dplyr,
  reactable,
  stringr,
  htmltools
)

# Load full metadata table
metadata_all <- readRDS('data/sm_data.rds')[['metadata']]

# Pick out variables to display
metadata <- metadata_all %>% 
  select(
    metric,
    'Variable Name' = variable_name,
    definition,
    dimension,
    index,
    indicator,
    units,
    'Year' = latest_year, # Renaming latest year as year, not including og year
    source,
    scope,
    resolution,
    url
) %>% 
  setNames(c(str_to_title(names(.))))


###
htmltools::browsable(
  tagList(
    
    tags$div(
      style = "display: flex; gap: 16px; margin-bottom: 20px; justify-content: center;",
      
      tags$button(
        class = "btn btn-primary",
        style = "display: flex; align-items: center; gap: 8px; padding: 8px 12px;",
        tagList(fontawesome::fa("download"), "Show/hide more columns"),
        onclick = "Reactable.setHiddenColumns('metadata_table', prevColumns => {
          return prevColumns.length === 0 ? ['Definition', 'Scope', 'Resolution', 'Url'] : []
        })"
      ),
      
      tags$button(
        class = "btn btn-primary",
        style = "display: flex; align-items: center; gap: 8px; padding: 8px 12px;",
        tagList(fontawesome::fa("download"), "Download as CSV"),
        onclick = "Reactable.downloadDataCSV('metadata_table', 'sustainability_metadata.csv')"
      )
    ),
    
    reactable(
      metadata,
      sortable = TRUE,
      resizable = TRUE,
      filterable = TRUE,
      searchable = TRUE,
      pagination = TRUE,
      bordered = TRUE,
      wrap = TRUE,
      rownames = FALSE,
      onClick = 'select',
      striped = TRUE,
      pageSizeOptions = c(5, 10, 25, 50, 100),
      defaultPageSize = 5,
      showPageSizeOptions = TRUE,
      highlight = TRUE,
      style = list(fontSize = "14px"),
      compact = TRUE,
      columns = list(
        # Dimension = colDef(
          # minWidth = 75,
          # sticky = 'left'
        # ),
        # Index = colDef(
          # minWidth = 75,
          # sticky = 'left'
        # ),
        # Indicator = colDef(
          # minWidth = 100,
          # sticky = 'left'
        # ),
        Metric = colDef(
          minWidth = 200,
          sticky = 'left'
        ),
        'Variable Name' = colDef(
          minWidth = 150
        ),
        Definition = colDef(
          minWidth = 250
        ),
        # Units = colDef(minWidth = 50),
        # Year = colDef(minWidth = 75),
        'Latest Year' = colDef(minWidth = 75),
        Source = colDef(minWidth = 250),
        Scope = colDef(show = FALSE),
        Resolution = colDef(show = FALSE),
        Url = colDef(
          minWidth = 300,
          show = FALSE
        )
      ),
      defaultColDef = colDef(minWidth = 100),
      elementId = "metadata_table",
      details = function(index) {
        div(
          style = "padding: 15px; border: 1px solid #ddd; margin: 10px 0;
             background-color: #E0EEEE; border-radius: 10px; border-color: black;
             box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);",
          
          tags$h4(
            strong("Details"), 
          ),
          tags$p(
            strong('Metric Name: '), 
            as.character(metadata_all[index, 'metric']),
          ),
          tags$p(
            strong('Variable Name: '), 
            as.character(metadata_all[index, 'variable_name']),
          ),
          tags$p(
            strong('Definition: '), 
            as.character(metadata_all[index, 'definition']),
          ),
          tags$p(
            strong('Source: '), 
            as.character(metadata_all[index, 'source'])
          ),
          tags$p(
            strong('Latest Year: '), 
            as.character(metadata_all[index, 'latest_year'])
          ),
          tags$p(
            strong('All Years (cleaned, wrangled, and included): '), 
            as.character(metadata_all[index, 'year'])
          ),
          tags$p(
            strong('Updates: '), 
            str_to_title(as.character(metadata_all[index, 'updates']))
          ),
          tags$p(
            strong('URL: '), 
            tags$a(
              href = as.character(metadata_all[index, 'url']),
              target = '_blank',
              as.character(metadata_all[index, 'url'])
            )
          )
        )
      }
    )
  )
)

Data Table

Code
pacman::p_load(
  dplyr,
  reactable,
  stringr,
  htmltools
)

# Load metrics and metadata
metadata_all <- readRDS('data/sm_data.rds')[['metadata']]
metrics <- readRDS('data/sm_data.rds')[['metrics']]
fips_key <- readRDS('data/sm_data.rds')[['fips_key']]

# Join relevant metadata to metrics table
econ_metrics <- metrics %>% 
  left_join(metadata_all, by = join_by('variable_name')) %>% 
  filter(dimension == 'economics') %>% 
  left_join(fips_key, by = join_by('fips')) %>% 
  mutate(county_name = ifelse(is.na(county_name), state_name, county_name)) %>% 
  mutate(
    # Clean up value formatting
    units = case_when(
      units %in% c('count', 'acres', 'usd') & as.numeric(value) > 1e6 ~ paste(units, '(x1000)'),
      .default = units
    ),
    value = case_when(
      str_detect(units, '(x1000)') ~ as.character(round(as.numeric(value) / 1000)),
      # str_detect(units, 'count|acres|usd') ~ format(round(as.numeric(value), 1), big.mark = ','),
      str_detect(units, 'count') ~ format(value, big.mark = ','),
      str_detect(units, 'usd') ~ paste0('$', value),
      units == 'binary' ~ ifelse(value == 1, 'True', 'False'),
      units == 'age' ~ paste(value, 'years'),
      units == 'percentage' ~ paste('%', value),
      .default = value
    )
  ) %>%
  select(
    metric,
    'Variable Name' = variable_name,
    definition,
    year = year.x,
    Area = county_name,
    value,
    units
  ) %>% 
  setNames(c(str_to_title(names(.)))) %>% 
  filter(!is.na(Value))

get_str(econ_metrics)
tibble [132,504 × 7] (S3: tbl_df/tbl/data.frame)
 $ Metric       : chr [1:132504] "Civilian Labor Force" "Number Employed" "Nu"..
 $ Variable Name: chr [1:132504] "civLaborForce" "employed" "unemployed" "une"..
 $ Definition   : chr [1:132504] "Number of civilians age 16 or older who are"..
 $ Year         : chr [1:132504] "2000" "2000" "2000" "2000" "2001" "2001" "2"..
 $ Area         : chr [1:132504] "US" "US" "US" "US" "US" "US" "US" "US" "US""..
 $ Value        : chr [1:132504] "142602" "136905" "5697" "% 4" "143787" "136"..
 $ Units        : chr [1:132504] "count (x1000)" "count (x1000)" "count (x100"..
Code
econ_metrics
# A tibble: 132,504 × 7
   Metric               `Variable Name`  Definition      Year  Area  Value Units
   <chr>                <chr>            <chr>           <chr> <chr> <chr> <chr>
 1 Civilian Labor Force civLaborForce    Number of civi… 2000  US    1426… coun…
 2 Number Employed      employed         People are cla… 2000  US    1369… coun…
 3 Number unemployed    unemployed       Meet all of th… 2000  US    5697  coun…
 4 Unemployment Rate    unemploymentRate Number of unem… 2000  US    % 4   perc…
 5 Civilian Labor Force civLaborForce    Number of civi… 2001  US    1437… coun…
 6 Number Employed      employed         People are cla… 2001  US    1369… coun…
 7 Number unemployed    unemployed       Meet all of th… 2001  US    6809  coun…
 8 Unemployment Rate    unemploymentRate Number of unem… 2001  US    % 4.7 perc…
 9 Civilian Labor Force civLaborForce    Number of civi… 2002  US    1448… coun…
10 Number Employed      employed         People are cla… 2002  US    1364… coun…
# ℹ 132,494 more rows
Code
econ_metrics %>% 
  select(Metric, Units) %>% 
  print(n = 75)
# A tibble: 132,504 × 2
   Metric               Units        
   <chr>                <chr>        
 1 Civilian Labor Force count (x1000)
 2 Number Employed      count (x1000)
 3 Number unemployed    count (x1000)
 4 Unemployment Rate    percentage   
 5 Civilian Labor Force count (x1000)
 6 Number Employed      count (x1000)
 7 Number unemployed    count (x1000)
 8 Unemployment Rate    percentage   
 9 Civilian Labor Force count (x1000)
10 Number Employed      count (x1000)
11 Number unemployed    count (x1000)
12 Unemployment Rate    percentage   
13 Civilian Labor Force count (x1000)
14 Number Employed      count (x1000)
15 Number unemployed    count (x1000)
16 Unemployment Rate    percentage   
17 Civilian Labor Force count (x1000)
18 Number Employed      count (x1000)
19 Number unemployed    count (x1000)
20 Unemployment Rate    percentage   
21 Civilian Labor Force count (x1000)
22 Number Employed      count (x1000)
23 Number unemployed    count (x1000)
24 Unemployment Rate    percentage   
25 Civilian Labor Force count (x1000)
26 Number Employed      count (x1000)
27 Number unemployed    count (x1000)
28 Unemployment Rate    percentage   
29 Civilian Labor Force count (x1000)
30 Number Employed      count (x1000)
31 Number unemployed    count (x1000)
32 Unemployment Rate    percentage   
33 Civilian Labor Force count (x1000)
34 Number Employed      count (x1000)
35 Number unemployed    count (x1000)
36 Unemployment Rate    percentage   
37 Civilian Labor Force count (x1000)
38 Number Employed      count (x1000)
39 Number unemployed    count (x1000)
40 Unemployment Rate    percentage   
41 Civilian Labor Force count (x1000)
42 Number Employed      count (x1000)
43 Number unemployed    count (x1000)
44 Unemployment Rate    percentage   
45 Civilian Labor Force count (x1000)
46 Number Employed      count (x1000)
47 Number unemployed    count (x1000)
48 Unemployment Rate    percentage   
49 Civilian Labor Force count (x1000)
50 Number Employed      count (x1000)
51 Number unemployed    count (x1000)
52 Unemployment Rate    percentage   
53 Civilian Labor Force count (x1000)
54 Number Employed      count (x1000)
55 Number unemployed    count (x1000)
56 Unemployment Rate    percentage   
57 Civilian Labor Force count (x1000)
58 Number Employed      count (x1000)
59 Number unemployed    count (x1000)
60 Unemployment Rate    percentage   
61 Civilian Labor Force count (x1000)
62 Number Employed      count (x1000)
63 Number unemployed    count (x1000)
64 Unemployment Rate    percentage   
65 Civilian Labor Force count (x1000)
66 Number Employed      count (x1000)
67 Number unemployed    count (x1000)
68 Unemployment Rate    percentage   
69 Civilian Labor Force count (x1000)
70 Number Employed      count (x1000)
71 Number unemployed    count (x1000)
72 Unemployment Rate    percentage   
73 Civilian Labor Force count (x1000)
74 Number Employed      count (x1000)
75 Number unemployed    count (x1000)
# ℹ 132,429 more rows
Code
## Reactable table
htmltools::browsable(
  tagList(
    
    tags$div(
      style = "display: flex; gap: 16px; margin-bottom: 20px; justify-content: center;",
      tags$button(
        class = "btn btn-primary",
        style = "display: flex; align-items: center; gap: 8px; padding: 8px 12px;",
        tagList(fontawesome::fa("download"), "Download as CSV"),
        onclick = "Reactable.downloadDataCSV('metrics_table', 'sustainability_metrics.csv')"
      )
    ),
    
    reactable(
      econ_metrics,
      sortable = TRUE,
      resizable = TRUE,
      filterable = TRUE,
      searchable = TRUE,
      pagination = TRUE,
      bordered = TRUE,
      wrap = TRUE,
      rownames = FALSE,
      onClick = 'select',
      striped = TRUE,
      pageSizeOptions = c(5, 10, 25, 50, 100),
      defaultPageSize = 5,
      showPageSizeOptions = TRUE,
      highlight = TRUE,
      style = list(fontSize = "14px"),
      compact = TRUE,
      columns = list(
        Metric = colDef(
          minWidth = 125,
          sticky = 'left'
        ),
        'Variable Name' = colDef(
          minWidth = 125
        ),
        Definition = colDef(
          minWidth = 250
        ),
        Units = colDef(minWidth = 100),
        'Year' = colDef(minWidth = 100)
      ),
      defaultColDef = colDef(minWidth = 100),
      elementId = "metrics_table"
    )
  )
)
Back to top